plot_heatmaps.ipynb¶

This notebook makes heatmaps of entry and binding data from filtered data

  • Written by Brendan Larsen
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
# input configs
altair_config = None
nipah_config = None

# input files
entropy_file = None

func_scores_E2_file = None
binding_E2_file = None
e2_low_func_file = None

func_scores_E3_file = None
binding_E3_file = None
e3_low_func_file = None

# output images
E2_binding_heatmap = None
E3_binding_heatmap = None
E2_entry_heatmap = None
E3_entry_heatmap = None
combined_entry_binding_contact = None
E3_entry_AA_prop_heatmap = None
E2_entry_AA_prop_heatmap = None
glycan_sites_img_heatmap = None
nipah_poly_sites_img_heatmap = None
HENV103_heatmap_plot = None
nAH1_heatmap_plot = None
HENV117_heatmap_plot = None
HENV32_heatmap_plot = None
HENV26_heatmap_plot = None
m102_heatmap_plot = None
In [2]:
# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/heatmap_theme.py"
entropy_file = "results/entropy/entropy.csv"
func_scores_E2_file = "results/filtered_data/entry/e2_entry_filtered.csv"
func_scores_E3_file = "results/filtered_data/entry/e3_entry_filtered.csv"
binding_E2_file = "results/filtered_data/binding/e2_binding_filtered.csv"
binding_E3_file = "results/filtered_data/binding/e3_binding_filtered.csv"
e2_low_func_file = "results/filtered_data/binding/e2_low_binding_effect_filter.csv"
e3_low_func_file = "results/filtered_data/binding/e3_low_binding_effect_filter.csv"
E2_binding_heatmap = "results/images/E2_binding_heatmap.html"
E3_binding_heatmap = "results/images/E3_binding_heatmap.html"
E2_entry_heatmap = "results/images/E2_entry_heatmap.html"
E3_entry_heatmap = "results/images/E3_entry_heatmap.html"
combined_entry_binding_contact = (
    "results/images/combined_entry_binding_contact_heatmaps.html"
)
E3_entry_AA_prop_heatmap = "results/images/E3_entry_AA_prop_heatmap.html"
E2_entry_AA_prop_heatmap = "results/images/E2_entry_AA_prop_heatmap.html"
glycan_sites_img_heatmap = "results/images/glycan_sites_img_heatmap.html"
nipah_poly_sites_img_heatmap = "results/images/nipah_poly_sites_img_heatmap.html"
HENV103_heatmap_plot = "results/images/HENV103_heatmap_plot.html"
nAH1_heatmap_plot = "results/images/nAH1_heatmap_plot.html"
HENV117_heatmap_plot = "results/images/HENV117_heatmap_plot.html"
HENV32_heatmap_plot = "results/images/HENV32_heatmap_plot.html"
HENV26_heatmap_plot = "results/images/HENV26_heatmap_plot.html"
m102_heatmap_plot = "results/images/m102_heatmap_plot.html"
In [3]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
import sys

# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()

# setup working directory
if os.getcwd() == "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/":
    pass
    print("Already in correct directory")
else:
    os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
    print("Setup in correct directory")

#import altair themes from /data/custom_analyses_data/theme.py and enable
sys.path.append('data/custom_analyses_data/')
import heatmap_theme
alt.themes.register('heatmap_theme', heatmap_theme.heatmap_theme)
alt.themes.enable('heatmap_theme')
Setup in correct directory
Out[3]:
ThemeRegistry.enable('heatmap_theme')
In [4]:
if nipah_config is None:
    ##hard paths in case don't want to run with snakemake
    print("loading hard paths")
    #altair_config = "data/custom_analyses_data/heatmap_theme.py"
    nipah_config = "nipah_config.yaml"
    entropy_file = "results/entropy/entropy.csv"

    # input files
    func_scores_E2_file = "results/filtered_data/entry/e2_entry_filtered.csv"
    binding_E2_file = "results/filtered_data/binding/e2_binding_filtered.csv"
    e2_low_func_file = "results/filtered_data/binding/e2_low_binding_effect_filter.csv"

    func_scores_E3_file = "results/filtered_data/entry/e3_entry_filtered.csv"
    binding_E3_file = "results/filtered_data/binding/e3_binding_filtered.csv"
    e3_low_func_file = "results/filtered_data/binding/e3_low_binding_effect_filter.csv"

    e3_low_func_file_ab = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
    antibody_file = "results/filtered_data/escape/mab_filter_concat.csv"

Read configs¶

In [5]:
with open(nipah_config) as f:
    config = yaml.safe_load(f)
In [6]:
# This function helps generate bounds for the heatmaps to make it even
# for start, end in [(71, 204), (204, 337), (337, 470), (470, 603)]
def generate_numbers(start=71, increment=65, final=602, count=8):
    numbers = []
    for _ in range(count):
        next_start = start + increment
        numbers.append((start, next_start))
        start = next_start
    # For the final tuple, the second number is fixed at 603
    numbers.append((start, final))
    return numbers


# Generating the numbers
generated_numbers = generate_numbers()
print(generated_numbers)
[(71, 136), (136, 201), (201, 266), (266, 331), (331, 396), (396, 461), (461, 526), (526, 591), (591, 602)]
In [7]:
# import filtered data
e3_low_func_file_ab = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
antibody_file = "results/filtered_data/escape/mab_filter_concat.csv"

e2_binding = pd.read_csv(binding_E2_file).round(2)
e2_func = pd.read_csv(func_scores_E2_file).round(2)
e2_low_func = pd.read_csv(e2_low_func_file).round(2)

e3_binding = pd.read_csv(binding_E3_file).round(2)
e3_func = pd.read_csv(func_scores_E3_file).round(2)
e3_low_func = pd.read_csv(e3_low_func_file).round(2)

e3_low_func_ab = pd.read_csv(e3_low_func_file_ab).round(2)
antibody_escape = pd.read_csv(antibody_file).round(2)
In [8]:
def prepare_distance(distance_file):
    # read in entropy data, calculated in different notebook
    #entropy = pd.read_csv(distance_file)
    df = distance_file[["site", "distance"]]
    df = df.dropna(subset=["site"])
    df["site"] = df["site"].astype("Int64")
    #df = df.rename(columns={"henipavirus_entropy": "entropy"})
    df["distance"] = df["distance"].round(2)
    df = df[["site", "distance"]].drop_duplicates()
    df["mutant"] = "distance"
    df["wildtype"] = ""
    df["type"] = "distance"
    df.rename(columns={"distance": "value"}, inplace=True)
    return df
    
def prepare_entropy():  # need to prepare entropy data for plotting on heatmap
    # read in entropy data, calculated in different notebook
    entropy = pd.read_csv(entropy_file)
    df = entropy[["site", "henipavirus_entropy"]]
    df = df.dropna(subset=["site"])
    df["site"] = df["site"].astype("Int64")
    df = df.rename(columns={"henipavirus_entropy": "entropy"})
    df["entropy"] = df["entropy"].round(2)
    df = df[["site", "entropy"]].drop_duplicates()
    df["mutant"] = "entropy"
    df["wildtype"] = ""
    df["type"] = "entropy"
    df.rename(columns={"entropy": "value"}, inplace=True)
    return df


def make_contact():
    df = pd.DataFrame(
        {
            "site": config["contact_sites"],
            "contact": [0.0] * len(config["contact_sites"]),
        }
    )
    # Renaming and restructuring the dataframe
    df["mutant"] = "receptor contact"
    df["wildtype"] = ""
    df["type"] = "receptor contact"
    df.rename(columns={"contact": "value"}, inplace=True)
    return df


# This gets called during heatmap generation
def make_empty_df(
    df,
    contact_df=None,
    entropy_df=None,
    contact_flag=None,
    entropy_flag=None,
    low_entry_df=None,
    binding_flag=None,
    escape_flag=None,
    distance_df=None,
):
    sites = range(71, 603)
    amino_acids = [
        "R",
        "K",
        "H",
        "D",
        "E",
        "Q",
        "N",
        "S",
        "T",
        "Y",
        "W",
        "F",
        "A",
        "I",
        "L",
        "M",
        "V",
        "G",
        "P",
        "C",
    ]
    # Create the combination of each site with each amino acid
    data = [{"site": site, "mutant": aa} for site in sites for aa in amino_acids]

    # Create the DataFrame
    empty_df = pd.DataFrame(data)

    if binding_flag:
        print("plotting binding")
        if low_entry_df is None:
            print("You indicated binding but did not provide a low_entry_df")
        all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
        df_test = all_sites_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["binding_mean"],
            var_name="type",
            value_name="value",
        )
        low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
        df_filter = low_entry_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["low_effect"],
            var_name="type",
            value_name="value",
        )
        df_test = pd.concat([df_test, df_filter])
    elif escape_flag:
        print("plotting escape")
        if low_entry_df is None:
            print("You indicated escape but did not provide a low_entry_df")
        all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
        df_test = all_sites_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["escape_mean"],
            var_name="type",
            value_name="value",
        )
        low_entry_df = low_entry_df.rename(columns={"effect": "low_effect"})
        df_filter = low_entry_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["low_effect"],
            var_name="type",
            value_name="value",
        )
        df_test = pd.concat([df_test, df_filter])
    else:
        print("plotting entry")
        all_sites_df = pd.merge(empty_df, df, on=["site", "mutant"], how="left")
        df_test = all_sites_df.melt(
            id_vars=["site", "mutant", "wildtype"],
            value_vars=["effect"],
            var_name="type",
            value_name="value",
        )

    if contact_flag and entropy_flag is None:
        df_test = pd.concat([df_test], ignore_index=True)
    if contact_flag is True:
        df_test = pd.concat([df_test, contact_df], ignore_index=True)
    if entropy_flag is True:
        df_test = pd.concat([df_test, entropy_df], ignore_index=True)
    if entropy_flag and contact_flag is True:
        df_test = pd.concat([df_test, entropy_df, contact_df], ignore_index=True)
    if distance_df is not None:
        print('making a distance df')
        df_test = pd.concat([df_test,distance_df], ignore_index=True)
    return df_test
In [9]:
# Make the base heatmap. This contains information about the x_axis and heatmap_sites which are important for sorting them correctly.
def make_base_heatmap(df, heatmap_sites, x_axis):
    base = (
        alt.Chart(df)
        .encode(
            x=alt.X("site:O", title="Site", sort=heatmap_sites, axis=x_axis),
            y=alt.Y(
                "mutant",
                title="Amino Acid",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
                axis=alt.Axis(grid=False),
            ),
        )
        .properties(
            width=alt.Step(10),
            height=alt.Step(10),
        )
    )
    return base


# This makes an 'empty' heatmap that shows sites that were not observed as some color (default:gray)
def make_empty_heatmap(base, background_color):
    chart_empty = (
        base.mark_rect(color=background_color)
        .encode(tooltip=["site", "mutant"])
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == "escape_mean"))
            & (alt.datum.value == None)
        )
    )
    return chart_empty


# This makes the white squares and X for the wildtype amino acids
def make_wildtype_heatmap(unique_wildtypes_df, strokewidth_size, heatmap_sites):
    wildtype_layer_box = (
        alt.Chart(unique_wildtypes_df)
        .mark_rect(color="white", stroke="#000000", strokeWidth=strokewidth_size)
        .encode(
            x=alt.X("site:O", sort=heatmap_sites),
            y=alt.Y(
                "wildtype",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
            ),
            tooltip=["site", "wildtype"],
        )
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_mean"))
            & (alt.datum.wildtype != None)
            & (alt.datum.value != None)
        )
    )
    wildtype_layer = (
        alt.Chart(unique_wildtypes_df)
        .mark_text(color="black", text="X", size=8, align="center", baseline="middle",dy=1)
        .encode(
            x=alt.X("site:O", sort=heatmap_sites),
            y=alt.Y(
                "wildtype",
                sort=alt.EncodingSortField(field="mutant_rank", order="ascending"),
            ),
            tooltip=["site", "wildtype"],
        )
        .transform_filter(
            ((alt.datum.type == "effect") | (alt.datum.type == "binding_mean") | (alt.datum.type == 'low_effect') | (alt.datum.type == "escape_mean"))
            & (alt.datum.wildtype != None)
            & (alt.datum.value != None)
        )
    )
    return wildtype_layer_box, wildtype_layer


# This makes the actual effect heatmap, and adds a bar for the legend if its the first time through the loop
def create_effect_chart(
    base, color_scale_effect, strokewidth_size, legend_title=None, effect_legend=None
):
    chart = (
        base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size)
        .encode(
            color=alt.condition(
                '(datum.type == "effect" | datum.type == "binding_mean" | datum.type == "escape_mean")',
                alt.Color("value:Q", scale=color_scale_effect, legend=effect_legend),
                alt.value("transparent"),
            ),
            tooltip=["site", "mutant", "wildtype", "value"],
        )
        .transform_filter((alt.datum.wildtype != "") & (alt.datum.wildtype != None))
    )
    return chart


# This makes a chart for the entropy values
def create_entropy_chart(
    base, color_scale_entropy, strokewidth_size, legend_title=None, entropy_legend=None
):
    chart = base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size).encode(
        color=alt.condition(
            'datum.mutant == "entropy"',
            alt.Color("value:Q", scale=color_scale_entropy, legend=entropy_legend),
            alt.value("transparent"),
        ),
        tooltip=["site", "mutant", "wildtype", "value"],
    )
    return chart
    
def create_distance_chart(
    base, color_scale_entropy, strokewidth_size, legend_title=None, distance_legend=None
):
    chart = base.mark_rect(stroke="#000000", strokeWidth=strokewidth_size).encode(
        color=alt.condition(
            'datum.mutant == "distance"',
            alt.Color("value:Q", scale=alt.Scale(scheme="purples", domain=[0, 10], reverse=True),legend=distance_legend),
            alt.value("transparent"),
        ),
        tooltip=["site", "mutant", "wildtype", "value"],
    )
    return chart

# This makes a chart for the contact sites
def create_contact_chart(base):
    chart_contact = (
        base.mark_rect(color="black")
        .encode(tooltip=["site"])
        .transform_filter((alt.datum.mutant == "receptor contact"))
    )
    return chart_contact

# Masks sites with low cell entry scores for binding or escape
def make_low_effect_heatmap(base, strokewidth_size, heatmap_sites):
    chart_filtered = (
        base.mark_rect(color="#808285", stroke="#000000", strokeWidth=strokewidth_size) ##939598
        .encode()
        .transform_filter(alt.datum.type == "low_effect")
    )
    return chart_filtered


# This compiles all the different charts and returns a single chart
def compile_chart(
    df,
    heatmap_sites,
    unique_wildtypes_df,
    x_axis,
    background_color,
    color_scale_effect,
    color_scale_entropy,
    strokewidth_size=None,
    legend_title=None,
    effect_legend=None,
    entropy_legend=None,
    binding_flag=None,
    escape_flag=None,
    distance_df=None,
    distance_legend=None
):
    base = make_base_heatmap(df, heatmap_sites, x_axis)
    chart_empty = make_empty_heatmap(base, background_color)
    chart_contact = create_contact_chart(base)
    chart_effect = create_effect_chart(
        base, color_scale_effect, strokewidth_size, legend_title, effect_legend
    )
    chart_entropy = create_entropy_chart(
        base, color_scale_entropy, strokewidth_size, legend_title, entropy_legend
    )
    chart_distance = create_distance_chart(
        base, color_scale_entropy, strokewidth_size, legend_title, distance_legend
    )
    wildtype_layer_box, wildtype_layer = make_wildtype_heatmap(
        unique_wildtypes_df, strokewidth_size, heatmap_sites
    )
    if binding_flag:
        print("Now making dataframe for binding")
        low_entry_heatmap = make_low_effect_heatmap(
            base, strokewidth_size, heatmap_sites
        )
        chart = alt.layer(
            chart_empty,
            chart_effect,
            low_entry_heatmap,
            chart_entropy,
            chart_contact,
            wildtype_layer_box,
            wildtype_layer,
        ).resolve_scale(y="shared", x="shared", color="independent")
    elif escape_flag:
        print("Now making dataframe for escape")
        low_entry_heatmap = make_low_effect_heatmap(
            base, strokewidth_size, heatmap_sites
        )
        chart = alt.layer(
            chart_empty,
            chart_effect,
            low_entry_heatmap,
            chart_distance,
            chart_entropy,
            chart_contact,
            wildtype_layer_box,
            wildtype_layer,
        ).resolve_scale(y="shared", x="shared", color="independent")
    else:
        chart = alt.layer(
            chart_empty,
            chart_effect,
            chart_entropy,
            chart_contact,
            wildtype_layer_box,
            wildtype_layer,
        ).resolve_scale(y="shared", x="shared", color="independent")

    return chart
In [10]:
def plot_entry_heatmap(
    df,
    legend_title,
    null_color=None,
    ranges=None,
    effect_color=None,
    entropy_color=None,
    strokewidth_size=None,
    custom_y_axis_order=None,
    entropy_flag=None,
    contact_flag=None,
    specific_sites=None,
    specific_sites_name=None,
    low_entry_df=None,
    binding_flag=None,
    escape_flag=None,
    custom_domain=None,
    escape_distance=None,
):
    """
    Generates a customizable heatmap for deep mutational scanning (DMS) data visualization.

    Parameters:
    - df (DataFrame): The data frame containing the data to be visualized. It must include the columns 'site', 'mutant', 'value', and 'wildtype'.
    - legend_title (str): The title of the heatmap legend.
    - null_color (str, optional): Color for mutants with no observations. Default is 'gray'.
    - ranges (list of tuples, optional): Defines the ranges for site wrapping on the heatmap. If not provided, a default range is used.
    - effect_color (str, optional): Color scheme for effect values. Default is 'red-blue'.
    - entropy_color (str, optional): Color scheme for entropy values. Default is 'purples'.
    - strokewidth_size (float, optional): The width of the stroke used in the heatmap. Default size is not specified.
    - custom_y_axis_order (list, optional): Specifies a custom order for the y-axis, overriding the default amino acid order.
    - entropy_flag (bool, optional): If True, sequence entropy is included in the heatmap. Default is False.
    - contact_flag (bool, optional): If True, contact sites are included in the heatmap. Default is False.
    - specific_sites (list, optional): Specifies a subset of sites to be plotted. If None, all sites are plotted using wrapping. Default is None.
    - specific_sites_name (str, optional): A title to display at the top of the heatmap for specific sites. Default is None.
    - low_entry_df (DataFrame,optional): If given, will use different color to show sites with low entry scores (Used for Binding Score Heatmaps)
    - binding_flag (bool, optional): If True, will plot binding instead of entry. Must be used with low_entry_df to mask low cell entry mutants
    - custom_domain (list, optional): Give custom domain used for coloring. If None, will use default [-4,2.5]
    - escape_flag (bool,optional): If True, will plot escape instead of entry. Must be used with low_entry_df to mask low cell entry mutants
    - escape_distance (DataFrame, optional): give distance file for residues

    Returns:
    An Altair chart object representing the generated heatmap. This chart can be further customized or directly displayed in Jupyter notebooks or other compatible environments.
    """

    if contact_flag:
        contact_df = make_contact()
    else:
        contact_df = None
    if entropy_flag is True:
        entropy_df = prepare_entropy()
    else:
        entropy_df = None
    if escape_distance is not None:
        distance_df = prepare_distance(escape_distance)
    else: 
        distance_df = None

    # Make the dataframes for plotting.
    empty_df = make_empty_df(
        df,
        contact_df=contact_df,
        entropy_df=entropy_df,
        distance_df=distance_df,
        contact_flag=contact_flag,
        entropy_flag=entropy_flag,
        low_entry_df=low_entry_df,
        binding_flag=binding_flag,
        escape_flag=escape_flag,
    )

    # Define the base order list
    base_order = [
        "R",
        "K",
        "H",
        "D",
        "E",
        "Q",
        "N",
        "S",
        "T",
        "Y",
        "W",
        "F",
        "A",
        "I",
        "L",
        "M",
        "V",
        "G",
        "P",
        "C",
    ]

    # Initialize custom_order with custom_y_axis_order or base_order based on custom_y_axis_order's value
    custom_order = (
        custom_y_axis_order if custom_y_axis_order is not None else base_order
    )
    # Prepend conditions based on flags
    if entropy_flag and contact_flag:
        # Both flags are true, prepend both "contact" and "entropy"
        custom_order = ["contact", "entropy"] + custom_order
    elif entropy_flag:
        # Only entropy_flag is true, prepend "entropy"
        custom_order = ["entropy"] + custom_order
    elif contact_flag:
        # Only contact_flag is true, prepend "contact"
        custom_order = ["contact"] + custom_order
    elif distance_df is not None:
        custom_order = ["distance"] + custom_order

    # Optional parameters
    if null_color is None:
        background_color = "#d1d3d4"
    else:
        background_color = null_color

    # Sites for wrapping heatmap correctly
    if ranges is None:
        full_ranges = [
            list(range(start, end))
            for start, end in [
                (71, 181),
                (181, 291),
                (291, 401),
                (401, 511),
                (511, 603),
            ]
        ]
    else:
        full_ranges = ranges

    # effect_color
    if custom_domain:
        if effect_color is None:
            color_scale_effect = alt.Scale(
                scheme="redblue", domainMid=0, domain=custom_domain
            )
        else:
            color_scale_effect = alt.Scale(
                scheme=effect_color, domainMid=0, domain=custom_domain
            )
    else:
        if effect_color is None:
            color_scale_effect = alt.Scale(
                scheme="redblue", domainMid=0, domain=[-4, 2.5]
            )
        else:
            color_scale_effect = alt.Scale(
                scheme=effect_color, domainMid=0, domain=[-4, 2.5]
            )

    # entropy_color
    if entropy_color is None:
        color_scale_entropy = alt.Scale(
            scheme="purples", domain=[0, 1.5], reverse=True)
    else:
        color_scale_entropy = alt.Scale(
            scheme=entropy_color, domain=[0, 1.5], reverse=True
        )
    
    # strokewidth size
    if strokewidth_size is None:
        strokewidth_size = 0.25
    else:
        strokewidth_size = strokewidth_size

    def determine_sorting_order(df):
        # Sort the dataframe by 'site' to ensure that duplicates are detected correctly.
        final_df = df.sort_values("site")
        sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
        final_df["mutant_rank"] = final_df["mutant"].map(sort_order)
        # Map the 'mutant' column to these ranks
        # Now sort the dataframe by this rank
        final_df = final_df.sort_values("mutant_rank")
        sites = sorted(final_df["site"].unique(), key=lambda x: float(x))
        return final_df, sites, sort_order

    heatmap_df, heatmap_sites, sort_order = determine_sorting_order(empty_df)

    # container to hold the charts
    charts = []

    if specific_sites:
        # Filter the heatmap to only show certain sites
        subset_df = heatmap_df[heatmap_df["site"].isin(specific_sites)]

        # Need to do independently for wildtype here for individual sites
        unique_wildtypes_df = subset_df.drop_duplicates(
            subset=["site", "wildtype"])
        unique_wildtypes_df = unique_wildtypes_df.sort_values("site")
        sort_order = {mutant: i for i, mutant in enumerate(custom_order)}
        unique_wildtypes_df["mutant_rank"] = unique_wildtypes_df["wildtype"].map(
            sort_order
        )
        unique_wildtypes_df = unique_wildtypes_df.sort_values("mutant_rank")

        effect_legend = alt.Legend(
            title=legend_title,
            titleAlign="left",
            tickCount=3,
            gradientLength=75,
            titleAnchor="start",
            offset=5,
            titlePadding=2,
            titleOrient='top',
            gradientThickness=10,
        )
        entropy_legend = (
            alt.Legend(title="Henipavirus Entropy", values=[0, 1, 2])
            if entropy_flag is True
            else None
        )

        # Setup x-axis labeling
        x_axis = alt.Axis(
            labelAngle=-90,
            title="Site",
            labels=True,
        )
        # Run the main heatmap compiler function
        chart = compile_chart(
            subset_df,
            heatmap_sites,
            unique_wildtypes_df,
            x_axis,
            background_color,
            color_scale_effect=color_scale_effect,
            color_scale_entropy=color_scale_entropy,
            strokewidth_size=strokewidth_size,
            legend_title=legend_title,
            effect_legend=effect_legend,
            entropy_legend=entropy_legend,
            binding_flag=binding_flag,
            escape_flag=escape_flag,
        )
        # Since this is a single chart, I don't know why I need to do this, but I seem to get errors if I don't append and then do alt.vconcat below. I get why I need to do this for multiple heatmaps in a for loop, but not here. Leaving in.
        charts.append(chart)
        if specific_sites_name:
            specific_sites_name = specific_sites_name
        else:
            specific_sites_name = ""
        combined_charts = alt.vconcat(*charts, title=specific_sites_name).resolve_scale(
            y="shared", x="shared", color="shared"
        )
        return combined_charts
    else:
        for idx, subset in enumerate(full_ranges):
            # Flags for showing the legend only the first time
            subset_df = heatmap_df[
                heatmap_df["site"].isin(subset)
            ]  # for the wrapping of sites
            unique_wildtypes_df = subset_df.drop_duplicates(
                subset=["site", "wildtype"]
            )  # for the wildtype mapping

            # Keep track of where in the loop we are for plotting
            is_last_plot = idx == len(full_ranges) - 1

            effect_legend = (
                alt.Legend(
                    title=legend_title,
                    direction="horizontal",
                    gradientLength=100,
                    titleAnchor="middle",
                    tickCount=3,
                    labelAlign="left",
                    titleAlign='center',
                    
                )
                if is_last_plot
                else None
            )
            entropy_legend = (
                alt.Legend(
                    title="Henipavirus Entropy",
                    direction="horizontal",
                    gradientLength=100,
                    titleAnchor="middle",
                    values=[0, 1, 2],
                    labelAlign="center",
                )
                if is_last_plot and entropy_flag is True
                else None
            )
            if distance_df is not None:
                distance_legend = (
                    alt.Legend(
                        title="Antibody distance",
                        direction="horizontal",
                        gradientLength=100,
                        titleAnchor="middle",
                        labelAlign="center",
                    )
                    if is_last_plot is True
                    else None
                )
            else:
                distance_legend = None

            x_axis = alt.Axis(
                labelAngle=-90,
                labelExpr="datum.value % 10 === 0 ? datum.value : ''",
                title="Site" if is_last_plot else None,
                labels=True,
            )
            chart = compile_chart(
                subset_df,
                heatmap_sites,
                unique_wildtypes_df,
                x_axis,
                background_color,
                color_scale_effect=color_scale_effect,
                color_scale_entropy=color_scale_entropy,
                strokewidth_size=strokewidth_size,
                legend_title=legend_title,
                effect_legend=effect_legend,
                entropy_legend=entropy_legend,
                distance_legend=distance_legend,
                binding_flag=binding_flag,
                escape_flag=escape_flag
            )
            charts.append(chart)
        combined_chart = alt.vconcat(
            *charts, spacing=3, title=alt.Title(f"{legend_title}",subtitle=['Interactive heatmap of Nipah RBP mutational effects relative to unmutated reference sequence'])
        ).resolve_scale(y="shared", x="independent", color="shared").configure_axisY(titlePadding=-50)
        return combined_chart

Now that we have heatmap function setup, make heatmaps:¶

First, do binding heatmaps¶

In [11]:
E2_binding_heatmap_full = plot_entry_heatmap(
    df=e2_binding,
    legend_title="bEFNB2 Binding",
    null_color=config["background_color"],
    effect_color=config["effect_color"],
    entropy_color=config["entropy_color"],
    strokewidth_size=config["strokewidth_size"],
    contact_flag=True,
    low_entry_df=e2_low_func,
    binding_flag=True,
    custom_domain=[-6, 2.5],
)
E2_binding_heatmap_full.display()
if combined_entry_binding_contact is not None:
    E2_binding_heatmap_full.save(E2_binding_heatmap)
plotting binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
In [12]:
E3_binding_heatmap_full = plot_entry_heatmap(
    df=e3_binding,
    legend_title="bEFNB3 Binding",
    null_color=config["background_color"],
    effect_color=config["effect_color"],
    entropy_color=config["entropy_color"],
    strokewidth_size=config["strokewidth_size"],
    contact_flag=True,
    low_entry_df=e3_low_func,
    binding_flag=True,
    custom_domain=[-2, 2],
)
E3_binding_heatmap_full.display()
if combined_entry_binding_contact is not None:
    E3_binding_heatmap_full.save(E3_binding_heatmap)
plotting binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding
Now making dataframe for binding

Then, make entry heatmaps:¶

In [13]:
E2_entry_heatmap_full = plot_entry_heatmap(
    df=e2_func,
    legend_title="CHO-bEFNB2 Entry",
    null_color=config["background_color"],
    effect_color=config["effect_color"],
    strokewidth_size=config["strokewidth_size"],
    contact_flag=True,
)
E2_entry_heatmap_full.display()
if combined_entry_binding_contact is not None:
    E2_entry_heatmap_full.save(E2_entry_heatmap)
plotting entry
In [14]:
E3_entry_heatmap_full = plot_entry_heatmap(
    df=e3_func,
    legend_title="CHO-bEFNB3 Entry",
    null_color=config["background_color"],
    effect_color=config["effect_color"],
    strokewidth_size=config["strokewidth_size"],
    contact_flag=True,
)
E3_entry_heatmap_full.display()
if combined_entry_binding_contact is not None:
    E3_entry_heatmap_full.save(E3_entry_heatmap)
plotting entry

Then, make entry and binding heatmaps for contact sites:¶

Make heatmaps by amino acid property for binding¶

In [15]:
hydrophobic_AA = ["A", "V", "L", "I", "M"]
aromatic_AA = ["Y", "W", "F"]
positive_AA = ["K", "R", "H"]
negative_AA = ["E", "D"]
hydrophilic_AA = ["S", "T", "N", "Q"]
special_AA = ["C","G","P"]

def find_aa_wildtype_sites(df, aa_type):
    aa_list = list(df[df["wildtype"].isin(aa_type)]["site"].unique())
    return aa_list


def make_AA_lists(df):
    hydrophobic_AA_list = find_aa_wildtype_sites(df, hydrophobic_AA)
    aromatic_AA_list = find_aa_wildtype_sites(df, aromatic_AA)
    positive_AA_list = find_aa_wildtype_sites(df, positive_AA)
    negative_AA_list = find_aa_wildtype_sites(df, negative_AA)
    hydrophilic_AA_list = find_aa_wildtype_sites(df, hydrophilic_AA)
    special_AA_list = find_aa_wildtype_sites(df, special_AA)

    all_AA = [
        hydrophobic_AA_list,
        aromatic_AA_list,
        positive_AA_list,
        negative_AA_list,
        hydrophilic_AA_list,
        special_AA_list,
    ]
    return all_AA


AA_names = ["Hydrophobic", "Aromatic", "Positive", "Negative", "Hydrophilic","Special"]

def make_aa_property_charts(
    df, df_low, sites_list, sites_name, custom_domain, legend_name
):
    empty_charts = []
    for aa_type, name in zip(sites_list, sites_name):
        aa_property = plot_entry_heatmap(
            df=df,
            legend_title=legend_name,
            null_color=config["background_color"],
            effect_color=config["effect_color"],
            entropy_color=config["entropy_color"],
            strokewidth_size=config["strokewidth_size"],
            specific_sites=aa_type,
            specific_sites_name=name,
            low_entry_df=df_low,
            binding_flag=True,
            custom_domain=custom_domain,
        )
        empty_charts.append(aa_property)
    combined_chart = alt.vconcat(*empty_charts, spacing=3)
    return combined_chart


E2_binding_AA_prop = make_aa_property_charts(
    e2_binding, e2_low_func, make_AA_lists(e2_binding), AA_names, [-6, 2.5], "bEFNB2 Binding"
)

E2_binding_AA_prop.display()
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
In [16]:
E3_binding_AA_prop = make_aa_property_charts(
    e3_binding, e3_low_func, make_AA_lists(e3_binding), AA_names, [-2, 2], "bEFNB3 Binding"
)

E3_binding_AA_prop.display()
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding

Now entry by amino acid property¶

In [17]:
def make_aa_property_charts_entry(
    df, sites_list, sites_name, legend_name
):
    empty_charts = []
    for aa_type, name in zip(sites_list, sites_name):
        aa_property = plot_entry_heatmap(
            df=df,
            legend_title=legend_name,
            null_color=config["background_color"],
            effect_color=config["effect_color"],
            entropy_color=config["entropy_color"],
            strokewidth_size=config["strokewidth_size"],
            specific_sites=aa_type,
            specific_sites_name=name,
            contact_flag = True,
        )
        empty_charts.append(aa_property)
    combined_chart = alt.vconcat(*empty_charts, spacing=3)
    return combined_chart
In [18]:
E2_entry_AA_prop = make_aa_property_charts_entry(
    e2_func, make_AA_lists(e2_func),AA_names, "CHO-bEFNB2 entry"
).configure_title(offset=5)
E2_entry_AA_prop.display()
if combined_entry_binding_contact is not None:
    E2_entry_AA_prop.save(E2_entry_AA_prop_heatmap)
plotting entry
plotting entry
plotting entry
plotting entry
plotting entry
plotting entry
In [19]:
E3_entry_AA_prop = make_aa_property_charts_entry(
    e3_func, make_AA_lists(e2_func), AA_names, "CHO-bEFNB3 entry"
).configure_title(offset=5)
E3_entry_AA_prop.display()
if combined_entry_binding_contact is not None:
    E3_entry_AA_prop.save(E3_entry_AA_prop_heatmap)
plotting entry
plotting entry
plotting entry
plotting entry
plotting entry
plotting entry

Now for specific sites I want to focus on¶

In [20]:
def make_entry_binding_heatmaps_for_specific_sites(title,sites_list):
    specific_sites_E2_binding = plot_entry_heatmap(
        df=e2_binding,
        legend_title="B2",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        specific_sites=sites_list,
        low_entry_df=e2_low_func,
        binding_flag=True,
        custom_domain=[-6, 2.5],
    )
    specific_sites_E3_binding = plot_entry_heatmap(
        df=e3_binding,
        legend_title="B3",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        specific_sites=sites_list,
        low_entry_df=e3_low_func,
        binding_flag=True,
        custom_domain=[-2, 2],
    )
    specific_sites_E2_entry = plot_entry_heatmap(
        df=e2_func,
        legend_title="B2",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        specific_sites=sites_list,
    )
    specific_sites_E3_entry = plot_entry_heatmap(
        df=e3_func,
        legend_title="B3",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        specific_sites=sites_list,
    
    )
    combo_binding_img = alt.vconcat(specific_sites_E2_binding,specific_sites_E3_binding).properties(title='Binding')
    combo_entry_img = alt.vconcat(specific_sites_E2_entry,specific_sites_E3_entry).properties(title='Entry')
    combo_combo = alt.hconcat(combo_entry_img,combo_binding_img).properties(title=title).configure_title(offset=15)
    return combo_combo
In [21]:
dimer_assm_chainA = make_entry_binding_heatmaps_for_specific_sites('ChainA Dimer Interface',[160, 162, 163, 164, 165, 166, 169, 171, 205, 206, 208, 247, 249, 256, 258, 259, 260, 261, 266, 267, 268, 270, 323, 324, 325, 331])
dimer_assm_chainA.display()
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry
In [22]:
dimer_assm_chainB = make_entry_binding_heatmaps_for_specific_sites('ChainB Dimer Interface',[167, 168, 170, 171, 172, 202, 203, 204, 205, 208, 210, 212, 237, 238, 239, 240, 242, 258, 584, 589, 591]) 
dimer_assm_chainB.display()
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry
In [23]:
BLI_mutant_sites = make_entry_binding_heatmaps_for_specific_sites('Sites Chosen for BLI',[244,305,492,507,530,553,555,559,588]) 
BLI_mutant_sites.display()
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry
In [24]:
contact_sites_img = make_entry_binding_heatmaps_for_specific_sites('Contact Sites',config['contact_sites'])
contact_sites_img.display()
if combined_entry_binding_contact is not None:
    contact_sites_img.save(combined_entry_binding_contact)
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry
In [25]:
stalk_sites_img = make_entry_binding_heatmaps_for_specific_sites('Stalk',list(range(72, 148)))
stalk_sites_img.display()
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry
In [26]:
neck_sites_img = make_entry_binding_heatmaps_for_specific_sites('Neck',list(range(148, 166)))
neck_sites_img.display()
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry
In [27]:
beta_sheet_sites_img = make_entry_binding_heatmaps_for_specific_sites('Beta Sheets',config['beta_sheet'])
beta_sheet_sites_img.display()
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry
In [28]:
nipah_poly_sites_img = make_entry_binding_heatmaps_for_specific_sites('Nipah polymorphic sites',config['nipah_poly'])
nipah_poly_sites_img.display()
if combined_entry_binding_contact is not None:
    nipah_poly_sites_img.save(nipah_poly_sites_img_heatmap)
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry
In [29]:
glycan_sites_img = make_entry_binding_heatmaps_for_specific_sites('Glycosylation sites',config['glycans'])
glycan_sites_img.display()
if combined_entry_binding_contact is not None:
    glycan_sites_img.save(glycan_sites_img_heatmap)
plotting binding
Now making dataframe for binding
plotting binding
Now making dataframe for binding
plotting entry
plotting entry

Make antibody escape heatmaps¶

In [30]:
m102 = antibody_escape[antibody_escape['ab'] == 'm102.4']
m102_dist = pd.read_csv("results/distances/df_m102_atomic_distances.csv")

HENV26 = antibody_escape[antibody_escape['ab'] == 'HENV-26']
HENV26_dist = pd.read_csv("results/distances/df_HENV26_atomic_distances.csv")

HENV32 = antibody_escape[antibody_escape['ab'] == 'HENV-32']
HENV32_dist = pd.read_csv("results/distances/df_HENV32_atomic_distances.csv")

nAH1 = antibody_escape[antibody_escape['ab'] == 'nAH1.3']
nAH1_dist = pd.read_csv("results/distances/df_nAH_atomic_distances.csv")

HENV117 = antibody_escape[antibody_escape['ab'] == 'HENV-117']


HENV103 = antibody_escape[antibody_escape['ab'] == 'HENV-103']

def make_escape_heatmaps(df,dist,title):
    escape_heatmap = plot_entry_heatmap(
        df=df,
        legend_title=f"Escape from {title}",
        null_color=config["background_color"],
        effect_color=config["effect_color"],
        entropy_color=config["entropy_color"],
        strokewidth_size=config["strokewidth_size"],
        low_entry_df=e3_low_func_ab,
        escape_flag=True,
        custom_domain=[-2, 2],
        contact_flag=False,
        escape_distance = dist,
    )
    escape_heatmap = escape_heatmap.configure_axisY(titlePadding=-10)
    return escape_heatmap
In [31]:
m102_heatmap = make_escape_heatmaps(m102,m102_dist,'m102.4')
m102_heatmap.save(m102_heatmap_plot)
plotting escape
making a distance df
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
In [32]:
HENV26_heatmap = make_escape_heatmaps(HENV26,HENV26_dist,'HENV-26')
HENV26_heatmap.save(HENV26_heatmap_plot)
plotting escape
making a distance df
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
In [33]:
HENV32_heatmap = make_escape_heatmaps(HENV32,HENV32_dist,'HENV-32')
HENV32_heatmap.save(HENV32_heatmap_plot)
plotting escape
making a distance df
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
In [34]:
nAH1_heatmap = make_escape_heatmaps(nAH1,nAH1_dist,'nAH1.3')
nAH1_heatmap.save(nAH1_heatmap_plot)
plotting escape
making a distance df
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
In [35]:
HENV117_heatmap = make_escape_heatmaps(HENV117,dist=None,title='HENV-117')
HENV117_heatmap = HENV117_heatmap.configure_axisY(titlePadding=4)
HENV117_heatmap.save(HENV117_heatmap_plot)
plotting escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
In [36]:
HENV103_heatmap = make_escape_heatmaps(HENV103,dist=None,title='HENV-103')
HENV103_heatmap = HENV103_heatmap.configure_axisY(titlePadding=4)
HENV103_heatmap.save(HENV103_heatmap_plot)
plotting escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
Now making dataframe for escape
In [ ]: